<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang>
<head><meta name="generator" content="Hexo 3.8.0">
  <meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform">
<meta http-equiv="Cache-Control" content="no-siteapp">
















  
  
  <link href="/hcigmoid/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css">




  
  
  
  

  
    
    
  

  
    
      
    

    
  

  

  

  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic|Lato:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/hcigmoid/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css">

<link href="/hcigmoid/css/main.css?v=5.1.4" rel="stylesheet" type="text/css">


  <link rel="apple-touch-icon" sizes="180x180" href="/hcigmoid/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/hcigmoid/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/hcigmoid/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/hcigmoid/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="Hexo, NexT">





  <link rel="alternate" href="/hcigmoid/atom.xml" title="HCigmoid" type="application/atom+xml">






<meta name="description" content="总结心得">
<meta name="keywords" content="feature, model, algorithm">
<meta property="og:type" content="website">
<meta property="og:title" content="HCigmoid">
<meta property="og:url" content="http://guyuecanhui.gitee.io/hcigmoid/page/3/index.html">
<meta property="og:site_name" content="HCigmoid">
<meta property="og:description" content="总结心得">
<meta property="og:locale" content="default">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="HCigmoid">
<meta name="twitter:description" content="总结心得">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/hcigmoid/',
    scheme: 'Gemini',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://guyuecanhui.gitee.io/hcigmoid/page/3/">





  <title>HCigmoid</title>
  








</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="default">

  
  
    
  

  <div class="container sidebar-position-left 
  page-home">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/hcigmoid/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">HCigmoid</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">Watch, learn and practise</p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/hcigmoid/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br>
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/hcigmoid/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br>
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/hcigmoid/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br>
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/hcigmoid/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br>
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/hcigmoid/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br>
            
            归档
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br>
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off" placeholder="搜索..." spellcheck="false" type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            
  <section id="posts" class="posts-expand">
    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/11/16/paper-2018-google-mmoe/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/11/16/paper-2018-google-mmoe/" itemprop="url">MMoE 论文精读</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-11-16T09:42:59+08:00">
                2019-11-16
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/论文精读/" itemprop="url" rel="index">
                    <span itemprop="name">论文精读</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/11/16/paper-2018-google-mmoe/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/11/16/paper-2018-google-mmoe/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  914
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <div class="note success"><p><strong>论文引用:</strong> Ma, Jiaqi , et al. “Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts.” <em>the 24th ACM SIGKDD International Conference</em> ACM, 2018. </p></div>
<p>本文是 Google 发表在 <strong>KDD 2018</strong> 的论文，不过感觉少了一些工程的加持，内容略显单薄。文章主要提出了一种多专家子网的结构，显式的从数据中学习多个任务之间的关系，并能够通过门限网络对每个任务进行单独的优化。与传统的 <strong>share-bottom</strong> 结构相比，这种结构在任务之间关联较弱时，仍然能够取得比较好的效果。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/11/16/paper-2018-google-mmoe/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/11/16/paper-2018-google-mmoe 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/11/16/paper-2018-google-mmoe 2/" itemprop="url">MMoE 论文精读</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-11-16T09:42:59+08:00">
                2019-11-16
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/论文精读/" itemprop="url" rel="index">
                    <span itemprop="name">论文精读</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/11/16/paper-2018-google-mmoe 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/11/16/paper-2018-google-mmoe 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  914
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <blockquote>
<p>论文引用: Ma, Jiaqi , et al. “Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts.” <em>the 24th ACM SIGKDD International Conference</em> ACM, 2018.</p>
</blockquote>
<p>本文是 Google 发表在 <strong>KDD 2018</strong> 的论文，不过感觉少了一些工程的加持，内容略显单薄。文章主要提出了一种多专家子网的结构，显式的从数据中学习多个任务之间的关系，并能够通过门限网络对每个任务进行单独的优化。与传统的 <strong>share-bottom</strong> 结构相比，这种结构在任务之间关联较弱时，仍然能够取得比较好的效果。</p>
<p>近年来，在推荐领域逐渐引入多任务学习来减轻一些使用单个模型指标可能带来的负面影响。例如在视频推荐中，只考虑点击转化率时，会倾向推荐包含标题党、擦边海报的视频；只考虑完成度时，会倾向推荐时间比较短的视频等等。而这些倾向都会影响用户体验，并且可能导致业务长期目标的下降。因此，大家开始尝试引入多个相互关联但又不一致的目标来进行综合考虑建模，并且实践表示，多任务学习在推荐系统中能够提升上下文推荐的效果。</p>
<p>传统的基于神经网络的多任务学习大致分为两类，一共是底层参数共享，即共享输入到中间层的参数，上层再分别对各个任务建模；一类是参数软共享，并不显式的共享底层参数，而是通过正则等对多个任务的参数进行相互约束。目前看到的比较多的是第一种，如下图 (a) 所示。而这种方式一般都假设多个任务的数据分布和目标都是相似的，当任务间差异变大时，对某些任务的预测性能就会产生较大的影响。然而实际任务的相关性都是难以度量的，因此效果实际上无法事先评估，只能靠不断尝试。</p>

<p>本文的作者受到 <strong>MoE</strong> 网络$^{[1]}$的启发，在多任务学习中引入 <strong>MoE</strong> 层，来显式的对多个任务的关系进行建模，或者理解成学习所有任务的不同方面；再对每个任务学习一个门限网络，这个门限网络可以理解成这个任务在各个方面的特点。整体结构如上图 (c) 所示。其中，每个共享的子网称为一个 <strong>Expert</strong>，文章中的 <strong>Expert</strong> 都使用前馈网络，它的输入是原始特征（也可以是一个共享的隐含层，直接使用原始特征效果会更好，但是维度可能过高），输出为各个 <strong>Gate</strong> 的权重分布（<strong>softmax</strong>），可以理解成是这个 <strong>Expert</strong> 对不同任务的影响程度。研究已经表明在 <strong>DNN</strong> 中，使用这种集成模型和集成子网络的方式有助于提高模型的性能。</p>
<p>文章在公开数据集和 Google 数据上进行了大量的对比实验，结果表明：</p>
<ol>
<li><strong>MMoE</strong> 在任务相关性变弱的情况下，性能影响较小，因此实用性也更强；</li>
<li><strong>MMoE</strong> 的训练误差收敛更快更稳定，即可训练性更好；这也与近年研究得出的结论一致，即 <em>Modulation and gating mechanisms can improve the trainability in training non-convex deep nurual networks</em>。</li>
</ol>
<h3 id="参考文献"><a href="#参考文献" class="headerlink" title="参考文献"></a>参考文献</h3><ol>
<li>Jacobs, Robert A. , et al. “<strong>Adaptive Mixtures of Local Experts.</strong>“ <em>Neural Computation</em> 3.1(1991):79-87.</li>
<li><strong>keras-mmoe</strong>: <a href="https://github.com/drawbridge/keras-mmoe" target="_blank" rel="noopener">https://github.com/drawbridge/keras-mmoe</a>.</li>
</ol>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/11/09/paper-2018-ali-esmm/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/11/09/paper-2018-ali-esmm/" itemprop="url">ESMM 论文精读</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-11-09T20:16:34+08:00">
                2019-11-09
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/论文精读/" itemprop="url" rel="index">
                    <span itemprop="name">论文精读</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/11/09/paper-2018-ali-esmm/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/11/09/paper-2018-ali-esmm/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  1.2k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  4
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <div class="note success"><p><strong>论文引用:</strong> Ma, Xiao, et al. “Entire space multi-task model: An effective approach for estimating post-click conversion rate.” The 41st International ACM SIGIR Conference on Research &amp; Development in Information Retrieval. ACM, 2018. </p></div>
<p>本文是阿里发表在 <strong>SIGIR 2018</strong> 年的短文，主要解决了精确预估 <strong>CVR</strong> 的问题。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/11/09/paper-2018-ali-esmm/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/11/09/paper-2018-ali-esmm 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/11/09/paper-2018-ali-esmm 2/" itemprop="url">ESMM 论文精读</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-11-09T20:16:34+08:00">
                2019-11-09
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/论文精读/" itemprop="url" rel="index">
                    <span itemprop="name">论文精读</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/11/09/paper-2018-ali-esmm 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/11/09/paper-2018-ali-esmm 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  1.2k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  4
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <blockquote>
<p>论文引用: Ma, Xiao, et al. “Entire space multi-task model: An effective approach for estimating post-click conversion rate.” The 41st International ACM SIGIR Conference on Research &amp; Development in Information Retrieval. ACM, 2018.</p>
</blockquote>
<p>本文是阿里发表在 <strong>SIGIR 2018</strong> 年的短文，主要解决了精确预估 <strong>CVR</strong> 的问题。<strong>CVR</strong> 预估是最大化场景商品交易总额 (<strong>GMV</strong>=<code>流量×点击率×转化率×客单价</code>) 的重要因子，它可以用于 <strong>OCPC</strong> 模式下动态调整出价来使平台和广告主共同受益；并且从用户体验的角度来说，准确预估的 <strong>CVR</strong> 被用来平衡用户的点击偏好与购买偏好。文章认为当前的 <strong>CVR</strong> 预估主要存在两个问题：</p>
<ol>
<li><strong>Sample Selection Bias (SSB)</strong>：当前 <strong>CVR</strong> 预估是基于 <code>点击-&gt;转化</code> 数据进行训练的，而有点击的展示数据只是所有展示数据中的一小部分 (如下图所示)，这部分数据的分布与整体的分布通常并不一致。而在实际 serving 的时候，模型又是对整个空间中的所有样本进行预测，因此模型的泛化效果会受到影响。</li>
<li><strong>Data Sparsity (DS)</strong>：与前一个问题的根因相同，只使用点击数据会存在严重的数据稀疏问题。</li>
</ol>

<p>业界也提出过一些解决这两个问题的方案：</p>
<ol>
<li><strong>SSB Solution</strong>：<strong>AMAN 方法</strong> 将所有展示未点击的数据也作为负样本进行训练，但是这种方法天然会导致 CVR 被低估 (因为对于一些展示未点击的物品，可能是因为用户并没有关注到，或者用户已经点击了其他的条目而遗漏，并非是真正不会产生转化的物品)；<strong>无偏估计方法</strong> 通过拒绝采样的方法来保证预估的 CVR 与真实的观察一致，但是这种方法在计算过程中会除以一个很小的数，因此可能导致数值不稳定的问题。</li>
<li><strong>DS Solution</strong>：<strong>分层建模方法</strong>使用不同的特征构建多个预估模型，然后使用 <strong>LR</strong> 等模型将这些模型的结果汇总，这种方法需要比较可靠的先验知识来构建分层模型，在数据量大的推荐场景下难以实现；<strong>过采样方法</strong>将数据量少的类别样本进行过采样，但是对采样参数十分敏感。</li>
</ol>
<p>文章在已有工作的基础上，提出使用多任务学习的框架，使用所有 <code>展示-&gt;点击-&gt;转化</code> 数据进行训练，将 <strong>CVR</strong> 预测问题转变为同时预测 <strong>CTR</strong> 和 <strong>CTCVR</strong> 的问题。由于使用所有展示样本，因此不存在 <strong>SSB</strong> 问题；在多任务学习下共享 embedding 向量，实际上是一种参数迁移学习，可以有效的解决 <strong>DS</strong> 问题。</p>
<p>具体来讲，将一个样本记为 $(\boldsymbol{x},y\rightarrow z)$，其中，$\boldsymbol{x}$ 表示样本特征，$y$ 表示是否点击，$z$ 表示是否转化。则：</p>
<script type="math/tex; mode=display">
\begin{cases}
pCTCVR = p(z=1,y=1|\boldsymbol{x}) = pCTR\times pCVR \\
pCTR = p(y=1|\boldsymbol{x})\\
pCVR = p(z=1|\boldsymbol{x},y=1)
\end{cases}</script><p>由于这三个变量的自由度为 2，因此损失函数只需要计算其中两个即可。文章将损失函数设计为 <strong>CTR</strong> 和 <strong>CTCVR</strong> 的预测损失，如下所示：</p>
<script type="math/tex; mode=display">
L(\theta_{cvr},\theta_{ctr}) = \sum_{i=1}^N l(y_i, f(\boldsymbol{x}_i;\theta_{ctr})) + \sum_{i=1}^N l(y_i\&z_i, f(\boldsymbol{x}_i;\theta_{ctr})\times f(\boldsymbol{x}_i;\theta_{cvr}))</script><p>整体网络架构如下图所示：</p>

<p>可以看到，两个任务共享底层 embedding，同时通过顶层的 <strong>Dot</strong> 算子进行关联。文章没有将 <strong>pCVR</strong> 作为最终输出的结果，是因为 $pCVR = \frac{pCTCVR}{pCTR}$，如果将 <strong>pCVR</strong> 作为最终输出，则最后一步为除法算子，而除法具有数值不稳定性，可能会得出 $pCVR&gt;1$ 的情况，因此将 <strong>pCTCVR</strong> 作为最终输出的结果，这样能够保证 <strong>pCVR</strong> 的结果在 $[0,1]$ 范围内，避免了数值不稳定的问题。</p>
<p>文章在淘宝数据上与现有解决 <strong>SSB</strong> 和 <strong>DS</strong> 问题的几个策略进行了对比验证，发现基于 <strong>ESSM</strong> 模型的 <strong>CVR</strong> 和 <strong>CTCVR</strong> 预估任务的 <strong>AUC</strong> 是最高的。而且文章还发表了一个 mini 公开数据集，诚意满满~</p>
<h3 id="参考文献"><a href="#参考文献" class="headerlink" title="参考文献"></a>参考文献</h3><ol>
<li><strong>XDL ESSM</strong>: <a href="https://github.com/alibaba/x-deeplearning/tree/master/xdl-algorithm-solution/ESMM" target="_blank" rel="noopener">https://github.com/alibaba/x-deeplearning/tree/master/xdl-algorithm-solution/ESMM</a></li>
<li><strong>完整空间多任务模型：CVR预估的有效方法</strong>: <a href="http://xudongyang.coding.me/esmm/" target="_blank" rel="noopener">http://xudongyang.coding.me/esmm/</a></li>
<li><strong>构建分布式Tensorflow模型系列之CVR预估案例ESMM模型</strong>: <a href="http://xudongyang.coding.me/esmm-1/" target="_blank" rel="noopener">http://xudongyang.coding.me/esmm-1/</a></li>
</ol>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/08/28/jupyter-config/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/08/28/jupyter-config/" itemprop="url">效率提升 10 倍的各种配置</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-08-28T22:36:04+08:00">
                2019-08-28
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/安装部署/" itemprop="url" rel="index">
                    <span itemprop="name">安装部署</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/08/28/jupyter-config/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/08/28/jupyter-config/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  592
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p>由于工作经常会更换机器、更换环境，有时候一个机器用惯了，换了一台机器都不记得自己之前是怎么配置的了。为了防止老年痴呆阻止我配置好看的工作环境，我决定把所有喜欢的配置都记录在这里，可能有点乱。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/08/28/jupyter-config/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/08/28/jupyter-config 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/08/28/jupyter-config 2/" itemprop="url">效率提升 10 倍的各种配置</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-08-28T22:36:04+08:00">
                2019-08-28
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/安装部署/" itemprop="url" rel="index">
                    <span itemprop="name">安装部署</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/08/28/jupyter-config 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/08/28/jupyter-config 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  592
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>由于工作经常会更换机器、更换环境，有时候一个机器用惯了，换了一台机器都不记得自己之前是怎么配置的了。为了防止老年痴呆阻止我配置好看的工作环境，我决定把所有喜欢的配置都记录在这里，可能有点乱。</p>
<h3 id="Jupyter-Notebook"><a href="#Jupyter-Notebook" class="headerlink" title="Jupyter Notebook"></a>Jupyter Notebook</h3><p>这个应该很常用了，大家第一件事应该就是设置主题吧，我试过各种主题，都无法满足我的诉求，所以就自己配置了一下 <code>~/.jupyter/custom/custom.css</code>，感觉下面这个配置简单又好看~</p>
<figure class="highlight css"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br></pre></td><td class="code"><pre><span class="line"><span class="selector-class">.introspection</span>, <span class="selector-class">.input_prompt</span>, <span class="selector-class">.output_prompt</span>, <span class="selector-class">.output</span>, <span class="selector-class">.CodeMirror</span> <span class="selector-tag">pre</span> &#123;</span><br><span class="line">    <span class="attribute">font-family</span>: <span class="string">"Microsoft YaHei Mono"</span>, Consolas, <span class="string">"Liberation Mono"</span>, Menlo, Courier, monospace;</span><br><span class="line">    <span class="attribute">font-size</span>: <span class="number">15px</span>;</span><br><span class="line">    <span class="attribute">line-height</span>: <span class="number">22px</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-tag">div</span><span class="selector-class">.output_area</span> <span class="selector-tag">pre</span> &#123;</span><br><span class="line"> <span class="attribute">font-family</span>: <span class="string">"Microsoft YaHei Mono"</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-tag">div</span><span class="selector-class">.text_cell</span>,</span><br><span class="line"><span class="selector-tag">div</span><span class="selector-class">.text_cell_render</span> <span class="selector-tag">pre</span>,</span><br><span class="line"><span class="selector-tag">div</span><span class="selector-class">.text_cell_render</span> &#123;</span><br><span class="line"> <span class="attribute">font-family</span>: sans-serif;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">11pt</span>;</span><br><span class="line"> <span class="attribute">line-height</span>: <span class="number">20pt</span>;</span><br><span class="line"> <span class="attribute">color</span>: <span class="number">#353535</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-tag">div</span><span class="selector-class">.rendered_html</span> <span class="selector-tag">code</span> &#123;</span><br><span class="line"> <span class="attribute">font-family</span>: <span class="string">"Microsoft YaHei Mono"</span>;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">11pt</span>;</span><br><span class="line"> <span class="attribute">padding-top</span>: <span class="number">3px</span>;</span><br><span class="line"> <span class="attribute">padding-left</span>: <span class="number">6px</span>;</span><br><span class="line"> <span class="attribute">padding-right</span>: <span class="number">6px</span>;</span><br><span class="line"> <span class="attribute">color</span>: <span class="number">#a3be8c</span>;</span><br><span class="line"> <span class="attribute">background</span>: <span class="number">#efefef</span>;</span><br><span class="line"> <span class="attribute">background-color</span>: <span class="number">#efefef</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.rendered_html</span> <span class="selector-tag">thead</span> &#123;</span><br><span class="line"> <span class="attribute">font-family</span>: <span class="string">"Microsoft YaHei Mono"</span>;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">10.5pt</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.rendered_html</span> <span class="selector-tag">td</span> &#123;</span><br><span class="line"> <span class="attribute">font-family</span>: <span class="string">"Microsoft YaHei Mono"</span>;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">10pt</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.rendered_html</span> <span class="selector-tag">h1</span>,</span><br><span class="line"><span class="selector-class">.text_cell_render</span> <span class="selector-tag">h1</span> &#123;</span><br><span class="line"> <span class="attribute">color</span>: <span class="number">#126dce</span> <span class="meta">!important</span>;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">160%</span>;</span><br><span class="line"> <span class="attribute">text-align</span>: left;</span><br><span class="line"> <span class="attribute">font-style</span>: normal;</span><br><span class="line"> <span class="attribute">font-weight</span>: bold;</span><br><span class="line">&#125;</span><br><span class="line"><span class="selector-class">.rendered_html</span> <span class="selector-tag">h2</span>,</span><br><span class="line"><span class="selector-class">.text_cell_render</span> <span class="selector-tag">h2</span> &#123;</span><br><span class="line"> <span class="attribute">color</span>: <span class="number">#126dce</span> <span class="meta">!important</span>;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">140%</span>;</span><br><span class="line"> <span class="attribute">font-style</span>: normal;</span><br><span class="line"> <span class="attribute">font-weight</span>: bold;</span><br><span class="line">&#125;</span><br><span class="line"><span class="selector-class">.rendered_html</span> <span class="selector-tag">h3</span>,</span><br><span class="line"><span class="selector-class">.text_cell_render</span> <span class="selector-tag">h3</span> &#123;</span><br><span class="line"> <span class="attribute">color</span>: <span class="number">#126dce</span> <span class="meta">!important</span>;</span><br><span class="line"> <span class="attribute">font-size</span>: <span class="number">120%</span>;</span><br><span class="line"> <span class="attribute">font-style</span>: normal;</span><br><span class="line"> <span class="attribute">font-weight</span>: bold;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-class">.CodeMirror-linenumber</span> &#123;</span><br><span class="line"> <span class="attribute">font-family</span>: <span class="string">"Microsoft YaHei Mono"</span>;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.cm-s-ipython</span><span class="selector-class">.CodeMirror</span> &#123;<span class="attribute">background</span>: <span class="number">#2b303b</span>; <span class="attribute">color</span>: <span class="number">#dfe1e8</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">div</span><span class="selector-class">.CodeMirror-selected</span> &#123;<span class="attribute">background</span>: <span class="number">#343d46</span> <span class="meta">!important</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-class">.CodeMirror-gutters</span> &#123;<span class="attribute">background</span>: <span class="number">#2b303b</span>; <span class="attribute">border-right</span>: <span class="number">0px</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-class">.CodeMirror-linenumber</span> &#123;<span class="attribute">color</span>: <span class="number">#65737e</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-class">.CodeMirror-cursor</span> &#123;<span class="attribute">border-left</span>: <span class="number">1px</span> solid <span class="number">#a7adba</span> <span class="meta">!important</span>;&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-comment</span> &#123;<span class="attribute">color</span>: <span class="number">#A3BE72</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-atom</span> &#123;<span class="attribute">color</span>: <span class="number">#b48ead</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-number</span> &#123;<span class="attribute">color</span>: <span class="number">#b48ead</span>;&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-property</span>, <span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-attribute</span> &#123;<span class="attribute">color</span>: <span class="number">#c0c5ce</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-keyword</span> &#123;<span class="attribute">color</span>: <span class="number">#DDD7A3</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-string</span> &#123;<span class="attribute">color</span>: <span class="number">#94C273</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-operator</span> &#123;<span class="attribute">color</span>: <span class="number">#ab7967</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-builtin</span> &#123;<span class="attribute">color</span>: <span class="number">#EA8080</span>;&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-variable</span> &#123;<span class="attribute">color</span>: <span class="number">#c0c5ce</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-variable-2</span> &#123;<span class="attribute">color</span>: <span class="number">#8fa1b3</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-def</span> &#123;<span class="attribute">color</span>: <span class="number">#61AFEF</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-error</span> &#123;<span class="attribute">background</span>: <span class="number">#bf616a</span>; <span class="attribute">color</span>: <span class="number">#a7adba</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-bracket</span> &#123;<span class="attribute">color</span>: <span class="number">#c0c5ce</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-tag</span> &#123;<span class="attribute">color</span>: <span class="number">#bf616a</span>;&#125;</span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-tag">span</span><span class="selector-class">.cm-link</span> &#123;<span class="attribute">color</span>: <span class="number">#b48ead</span>;&#125;</span><br><span class="line"></span><br><span class="line"><span class="selector-class">.cm-s-ipython</span> <span class="selector-class">.CodeMirror-matchingbracket</span> &#123; <span class="attribute">text-decoration</span>: underline; <span class="attribute">color</span>: <span class="number">#dfe1e8</span> <span class="meta">!important</span>;&#125;</span><br></pre></td></tr></table></figure>
<p>另外，在 linux 上安装 jupyter notebook 的话，第一次启动时会报 `` 的错，需要修改以下文件：</p>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/08/18/conjugate-priors-video-quality/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/08/18/conjugate-priors-video-quality/" itemprop="url">例解共轭分布之视频质量评估</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-08-18T10:47:06+08:00">
                2019-08-18
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/数学/" itemprop="url" rel="index">
                    <span itemprop="name">数学</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/08/18/conjugate-priors-video-quality/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/08/18/conjugate-priors-video-quality/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  2.6k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  9
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p>在推荐领域阅读文献的时候，我们常常会遇到共轭分布、共轭先验 (<strong>conjugate prior</strong>) 之类的概念。由于共轭这个翻译实在不太直观，因此这些概念也很难理解，我想结合两个视频推荐中的例子来尝试说明这些概念。今天先介绍视频质量评估的例子。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/08/18/conjugate-priors-video-quality/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/08/18/conjugate-priors-video-quality 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/08/18/conjugate-priors-video-quality 2/" itemprop="url">例解共轭分布之视频质量评估</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-08-18T10:47:06+08:00">
                2019-08-18
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/数学/" itemprop="url" rel="index">
                    <span itemprop="name">数学</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/08/18/conjugate-priors-video-quality 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/08/18/conjugate-priors-video-quality 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  2.6k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  9
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>在推荐领域阅读文献的时候，我们常常会遇到共轭分布、共轭先验 (<strong>conjugate prior</strong>) 之类的概念。由于共轭这个翻译实在不太直观，因此这些概念也很难理解，我想结合两个视频推荐中的例子来尝试说明这些概念。今天先介绍视频质量评估的例子。</p>
<h3 id="问题背景"><a href="#问题背景" class="headerlink" title="问题背景"></a>问题背景</h3><p>如何评估一个视频的质量是视频推荐中非常重要但是又很让人头疼的事情。尤其是在短视频场景下，每天新增大量的短视频，我们需要迅速判断一个新短视频的质量：如果质量很好，我们可以将它向更多的人推荐；如果质量不好，我们可能不再主动推荐该视频。这里判断的时效很重要，因为如果没有及时发现一个垃圾视频，它可能就会通过推荐系统祸害很多的用户 =.=!</p>
<p>由于视频非常多，我们无法人工对每个视频进行准确的质量评估，而且用户在短视频的观看行为呈现出更加多元的兴趣，因此小编们也无法代表所有用户的口味。因此，要评估一个短视频好不好，还是得看它在用户中的表现 (可以用完播率、点赞率、分享率等统计指标来度量)。</p>
<p>假设我们只考虑用完播率 $r$ 来度量一个视频的质量 (后面交替使用完播率和视频质量)，它表示一个视频被播放完 (或者播放超过一定比例) 的数量 $m$ 与它展现给用户的次数 $n$ 的比例：</p>
<script type="math/tex; mode=display">
r=\frac{m}{n} \qquad(1)</script><p>这个指标的优点是计算非常方便，而且能够在一定程度上表达业务诉求。但是实际应用的时候，往往会因为视频的展示次数过少而对某个视频进行错误的评价。例如，视频 $v_1$ 只展示了 $10$ 次，有 $5$ 次完播；视频 $v_2$ 展示了 $10000$ 次，有 $4900$ 次完播，相较而言，$v_1$ 和 $v_2$ 哪个质量更好呢？</p>
<p>很难说！我们只是比较确信 $v_2$ 的完播率稳定在了 $49\%$ 左右，而对于 $v_1$ 的评估就非常不确定了：有可能它再展示 $10$ 次以后，一次都没人看；也有可能它再展示 $10$ 次每次都完播了。这两种情况下我们对视频质量的评估将发生非常大的变化。</p>
<h3 id="模型假设-先验分布"><a href="#模型假设-先验分布" class="headerlink" title="模型假设 (先验分布)"></a>模型假设 (先验分布)</h3><p>从根本上来讲，我们简单的用式 $(1)$ 来计算完播率忽略了事件过少时候的不确定性。为了引入这种不确定性，我们可以用一个概率分布来表示视频的质量 (这就是贝叶斯学派的观点，$r$ 并不是一个固定的值，而是满足一定的概率分布)，也就是说，给定 $m$ 和 $n$，我们要来估计这个视频的质量呈现一个什么样的分布。这个分布的形状是我们在看到数据之前<strong>根据经验</strong>去假定的，因此我们也叫它<strong>先验分布</strong>。</p>
<p>我们的直观想法是，如果一个视频的完播率为 $r=\frac{m}{n}$，那么它质量的真实分布 $\theta$ 中，概率最大的点也应该是 $\frac{m}{n}$，并且与 $\frac{m}{n}$ 相差越多概率也越小。</p>
<p>根据这个想法，我们可以用 <strong>Beta</strong> 分布来进行建模，将 $\alpha=m$ 和 $\beta=n-m$ 作为 <strong>Beta</strong> 分布的参数 (<strong>Beta</strong> 分布的详细介绍可以参考 Wiki)。在我们的例子中，随着 $n$ 的增加，<strong>Beta</strong> 分布的概率密度越集中于 $r=\frac{m}{n}$。下图表示随着 $m$ 和 $n$ 变化，保持 $r=0.5$ 不变的情况下，<strong>Beta</strong> 分布的概率密度函数：</p>

<p>可以看到，当 $n$ 很小的时候，视频的质量是高度不确定的；而当 $n$ 很大的时候，视频的质量已经集中分布于 $r=\frac{m}{n}$ 附近了。因此，我们选的这个先验分布是能够满足我们的直观想法和假设要求的。这样，我们用式 $(2)$ 来代替式 $(1)$ 对视频质量进行初步的评估：</p>
<script type="math/tex; mode=display">
\begin{align}
p(\theta;\alpha,\beta)=Beta(\theta;\alpha,\beta)
&=\frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)}\theta^{\alpha-1}(1-\theta)^{\beta-1}\\
&=\frac{1}{B(\alpha,\beta)}\theta^{\alpha-1}(1-\theta)^{\beta-1}
\end{align} \qquad(2)</script><p>其中，$\Gamma(n)=(n-1)!$ 表示伽玛函数 (这个场景下参数都为整数)；$B(\alpha,\beta)$ 可以看成是归一化项，使得所有概率累加和为 $1$。</p>
<h3 id="更新模型-后验分布"><a href="#更新模型-后验分布" class="headerlink" title="更新模型 (后验分布)"></a>更新模型 (后验分布)</h3><p>问题才解决了一半，由于视频在不断的推荐给用户，我们的统计数据也在发生变化。因此另外一个至关重要的问题是，我们怎么根据新增的数据来更新我们对视频质量的评估。</p>
<p>例如，对于某个视频 $v$，假设我们已经收集到一些反馈数据，并统计出 $\alpha=m$，$\beta=n-m$，我们根据式 $(2)$ 对视频质量分布 $\theta$ 有了一个初步的估计。现在我们又将这个视频推荐给其他用户，并想看看放量以后，视频质量评估是否准确。假设这个视频又展示了 $b$ 次，完播了 $a$ 次，则我们根据先验假设，视频的质量应该是围绕 $r’=\frac{m+a}{n+b}$ 的钟形分布，并且比之前的分布更陡峭一些。为了实现这个过程，我们需要对模型进行更新。</p>
<p>由于用户的反馈只包含完播和未完播两类，因此很容易想到用二项分布的似然估计来估计这 $b$ 次展示中有 $a$ 次会完播的概率：</p>
<script type="math/tex; mode=display">
p(x=a|\theta)=C_b^a\theta^{a}(1-\theta)^{(b-a)} \qquad(3)</script><p>由于式 $(3)$ 中的 $\theta$ 实际上是满足式 $(2)$ 中的分布 (<em>注意，这里 $\theta$ 虽然仍然是一个分布，但是我们在这一步假设它是已知的</em>)，代入后可以算出 $a$ 次完播的概率为 $\theta$ 取所有可能值时式 $(3)$ 的积分：</p>
<script type="math/tex; mode=display">
p(x=a)=\int_0^1 p(x=a;\theta)p(\theta)d\theta \qquad(4)</script><p>这里我们要用最基础的贝叶斯公式，来基于初始的视频质量评估和增量收集来的统计数据，去修正我们在式 $(2)$ 中做出的视频质量评估，得到一个更加可靠的估计。贝叶斯公式如下：</p>
<script type="math/tex; mode=display">
p(\theta;X)=\frac{p(X;\theta)p(\theta)}{p(X)} \qquad(5)</script><p>其中，$p(\theta)$ 是我们对这个视频质量的初始评估，即先验分布，用式 $(2)$ 来计算；$p(X;\theta)$ 表示我们基于初始的评估结果，进一步估计事件 $X$ 发生的概率，即似然估计，用式 $(3)$ 来计算；$p(X)$ 表示 $\theta$ 取不同值时事件 $X$ 发生的概率之和，主要是用于做归一化，用式 $(4)$ 来计算；$p(\theta;X)$ 则表示基于先验分布和似然估计，得到的后验分布。</p>
<p>全部代入后，我们可以得到下面的简单推导：</p>
<script type="math/tex; mode=display">
\begin{align}
p(\theta;x=a)
&=\frac{p(x=a;\theta)p(\theta)}{\int_0^1 p(x=a;\theta)p(\theta)d(\theta)}\\
&=\frac{C_b^a\theta^{a}(1-\theta)^{(b-a)} \frac{1}{B(\alpha,\beta)}\theta^{\alpha-1}(1-\theta)^{\beta-1}}{\int_0^1 C_b^a\theta^{a}(1-\theta)^{(b-a)}\frac{1}{B(\alpha,\beta)}\theta^{\alpha-1}(1-\theta)^{\beta-1}d\theta}\\
&=\frac{\theta^{\alpha+a-1}(1-\theta)^{\beta+b-a-1}}{\int_0^1\theta^{\alpha+a-1}(1-\theta)^{\beta+b-a-1}d\theta}\\
&=\frac{1}{B(\alpha+a,\beta+b-a)}\theta^{\alpha+a-1}(1-\theta)^{\beta+b-a-1}\\
&=Beta(\theta;\alpha+a,\beta+b-a)
\end{align} \qquad(6)</script><p>也就是说，经过了这一轮的推荐以后，我们对这个视频的质量评估仅仅使模型的参数发了变化，而模型的形式不变，仍然为 <strong>Beta</strong> 分布！至此，我们终于触及本文的核心概念：共轭性。</p>
<blockquote>
<p> <strong>模型的先验分布与后验分布具有相同的函数形式，这个性质就叫做共轭性。</strong></p>
</blockquote>
<h3 id="共轭性"><a href="#共轭性" class="headerlink" title="共轭性"></a>共轭性</h3><p>共轭性给我们带来了什么样的好处呢？比较式 $(6)$ 和式 $(2)$，我们发现，在观察到 $b$ 次推荐中有 $a$ 次完播事件后，我们可以简单的将模型从 $Beta(\theta;\alpha,\beta)$ 更新为 $Beta(\theta;\alpha+a,\beta+b-a)$，即我们只需要更新如下模型参数：</p>
<script type="math/tex; mode=display">
\begin{cases}\begin{align}
\alpha&=\alpha+a \\
\beta&=\beta+b-a
\end{align}\end{cases}</script><p>它的最大意义在于简化了模型更新的过程，使得模型更新的实时性得到了保证。</p>
<p>一开始，我们基于先验知识对视频质量进行建模，但是由于数据量较少，我们对视频质量的估计置信度较低；随着用户反馈的数据越来越多，我们可以直接基于这些新增的数据去快速更新模型的参数；随着参数数值的增大，我们对视频质量的估计置信度越来越高，直到我们已经有充足的把握认定这个视频是不是高质量视频。</p>
<p>这里的置信度还体现在，当数据量较少的时候，少量的观测结果就会导致我们对视频质量评估发生巨大的变化；而当数据量充足的时候，即使再收集到很多数据，也很难改变我们的评估。</p>
<h3 id="Take-aways"><a href="#Take-aways" class="headerlink" title="Take-aways"></a>Take-aways</h3><p>至此，我们用视频质量评估的例子说明了共轭性和共轭分布是什么含义。一些关键点总结如下：</p>
<ol>
<li>共轭性是指模型的先验分布和后验分布有相同的形式，满足共轭性的分布称为共轭分布。例如：<strong>Beta</strong> 分布与二项分布是共轭分布，且 <strong>Beta</strong> 分布是 $\theta$ 的共轭先验；</li>
<li>共轭性极大的方便了我们基于增量观测的数据对模型进行更新；</li>
<li>在推导共轭性的时候，我们使用了贝叶斯公式，总结起来就是：后验分布=先验分布*似然函数/归一化因子；</li>
</ol>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/08/10/feature-selection-kendall 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/08/10/feature-selection-kendall 2/" itemprop="url">常用的特征选择方法之 Kendall 秩相关系数</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-08-10T22:44:41+08:00">
                2019-08-10
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/特征工程/" itemprop="url" rel="index">
                    <span itemprop="name">特征工程</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/08/10/feature-selection-kendall 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/08/10/feature-selection-kendall 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  888
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>前面我们已经讨论了 <a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener"><strong>Pearson</strong> 相关系数</a>和 <a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-spearman/" target="_blank" rel="noopener"><strong>Spearman</strong> 秩相关系数</a>，它们可以检测连续变量间的相关性，并且 <strong>Spearman</strong> 秩相关系数还能够检测有序的离散变量间的相关系数。今天我们再讨论一个能够检测有序变量相关性的系数：<strong>Kendall</strong> 秩相关系数。这里有序变量既包括实数变量，也包括可以排序的类别变量，比如名次、年龄段等。</p>
<h3 id="Kendall-秩相关系数的定义"><a href="#Kendall-秩相关系数的定义" class="headerlink" title="Kendall 秩相关系数的定义"></a>Kendall 秩相关系数的定义</h3><p><strong>Kendall</strong> 秩相关系数是一个非参数性质（与分布无关）的秩统计参数，是用来度量两个<strong>有序变量</strong>之间<strong>单调关系</strong>强弱的相关系数，它的取值范围是 $[-1,1]$，绝对值越大，表示单调相关性越强，取值为 $0$ 时表示完全不相关。</p>
<p>原始的 <strong>Kendall</strong> 秩相关系数定义在<strong>一致对</strong> (<strong>concordant pairs</strong>) 和<strong>分歧对</strong> (<strong>discordant pairs</strong>) 的概念上。所谓一致对，就是两个变量取值的相对关系一致；分歧对则是指它们的相对关系不一致。这么说有点难以理解，我们举个例子。</p>
<p>假设我们为很多不同年龄的用户推送了一条社保相关的视频，然后回收了这些用户的播放完成度，如下表所示：</p>

<p>我们想用 <strong>Kendall</strong> 秩相关系数来分析用户年龄与该社保视频的播放情况是否相关。为此，我们将年龄和播放完成度分别排序后，对样本中取值进行排序和编号，分别得到 <code>年龄序号</code> 和 <code>播放序号</code>。这时，对于样本 $3$ 和样本 $4$，它们的年龄序号是 $[3,4]$，播放序号是 $[2,4]$，虽然序号不同，但是变化趋势是相同的，因此它们是一致的；对于样本 $2$ 和样本 $3$，它们的年龄序号是 $[2,3]$，播放序号是 $[5,2]$，它们的变化趋势是相反的，因此它们是分歧的。</p>
<p>进一步的，我们观察可以发现，当样本已经按年龄升序排列后，对于每个样本，我们可以简单的数一下该样本后续样本中播放序号大于该样本的样本数量，作为该样本引入的一致对数 (该样本之前的样本与该样本也可能一致，但是已经算过一次了)，将所有样本引入的一致对数加起来就能得到所有样本的一致对数，记为 $c$。</p>
<p>同样的，对于每个样本，我们可以简单的数一下该样本后续样本中播放序号小于该样本的样本数量，作为该样本引入的分歧对数，累加后得到所有样本的分歧对数，记为 $d$。</p>
<p>则原始的 <strong>Kendall</strong> 秩相关系数定义为：</p>
<script type="math/tex; mode=display">
\tau_a=\frac{c-d}{c+d}=\frac{c-d}{\frac{1}{2}\cdot n\cdot (n-1)}\qquad (1)</script><p>其中，$m=\frac{n\cdot (n-1)}{2}$ 表示所有样本两两组合的数量，在变量没有重复取值的情况下，$m=c+d$。定义 $(1)$ 也被称为 <a href="https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient" target="_blank" rel="noopener"><strong>Tau-a</strong></a>，从定义也容易看出，它不能处理变量有相同取值的情况。</p>
<p>为了处理变量有相同取值的情况，我们还要将每个变量中相同取值的数量考虑进来，从而得到扩展的定义：</p>
<script type="math/tex; mode=display">
\tau_b=\frac{c-d}{\sqrt{(c+d+t_x)(c+d+t_y)}}\qquad (2)</script><p>其中，$c$ 在计算的时候只能算 <script type="math/tex">a_i<a_j</script> 且 <script type="math/tex">b_i<b_j</script> 的对数，$d$ 也只能算 <script type="math/tex">a_i<a_j</script> 且 <script type="math/tex">b_i>b_j</script> 的对数 (<script type="math/tex">i<j</script>)；$t_x$，$t_y$ 分别表示变量 $x$，$y$ 取值中序号相同的样本对数排除共同平局的部分 (在下一小节举例说明)。式 <script type="math/tex">(2)</script> 通常又被称为 <strong>Tau-b</strong>，是实际中应用最广泛的定义 (另外还有 <strong>Tau-c</strong> 的变种这里就不介绍了)。在 <a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html" target="_blank" rel="noopener"><strong>scipy 1.3.0</strong></a> 版本的实现中，同时支持式 $(1)$ 和式 $(2)$。</p>
<h3 id="举例说明"><a href="#举例说明" class="headerlink" title="举例说明"></a>举例说明</h3><p>首先，我们根据式 $(1)$ 算一下图 $1$ 中年龄与播放的相关度。</p>
<ol>
<li>将样本按年龄升序排列，将播放完成度按从小到大的顺序编号，如图 $1$ 所示；</li>
<li>分别计算每个样本新引入的一致对数和分歧对数，如图 $1$ 所示，进而算出 $c=40$，$d=5$；</li>
<li>根据式 $(1)$ 得到 $\tau_a=\frac{40-5}{40+5}=0.778$；</li>
</ol>
<p>因此，年龄与播放社保视频的时长呈现强相关性，基于这个分析我们就可以尝试对更多年龄大一些的用户推送此视频。</p>
<p>在工程实现的时候，用户的年龄通常会被划分成不同的区间，而播放完成度只有超过一定阈值 (如 $0.3$) 我们才算作有效播放。因此，图 $1$ 的数据我们又可以转换成下面的离散情况：</p>

<p>可以发现，年龄段序号和有效播放序号存在大量的重复数据，因此我们基于式 $(2)$ 来计算：</p>
<ol>
<li>将样本按年龄段升序排列，相同的年龄段按是否有效播放排序，对年龄段和是否有效播放进行编号，如图 $2$ 所示；</li>
<li>计算每个样本引入的一致对数和分歧对数，如图 $2$ 所示 (例如样本 $4$ 与 样本 $8\sim 10$ 一致)，进而算出 $c=21$，$d=0$；</li>
<li>计算公共平局的数量 $t_c$，公共平局是指 $a_i=a_j$ 且 $b_i=b_j$ 的情况 (例如样本 $1\sim 3$ 互为平局，样本 $4,5,7$ 互为平局，样本 $8,9$ 互为平局)，根据图 $2$ 易知：$t_c=\frac{3\cdot (3-1)}{2}+\frac{3\cdot (3-1)}{2}+\frac{2\cdot (2-1)}{2}=7$；</li>
<li>计算只在年龄段平局的数量 $t_x=\frac{3\cdot (3-1)}{2}+\frac{4\cdot (4-1)}{2}+\frac{2\cdot (2-1)}{2}-t_c=10-7=3$；</li>
<li>计算只在有效播放平均局的数量 $t_y=\frac{6\cdot (6-1)}{2}+\frac{4\cdot (4-1)}{2}-t_c=21-7=14$；</li>
<li>根据式 $(2)$ 得到 $\tau_b=\frac{21}{\sqrt{(21+3)(21+14)}}=0.725$；</li>
</ol>
<p>对比发现，离散化后，我们发现这两个因素之间仍然是强相关的。</p>
<blockquote>
<p>附示例的 python 代码<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">&gt;&gt;&gt; </span><span class="keyword">from</span> scipy.stats <span class="keyword">import</span> kendalltau</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>x=[<span class="number">1</span>,<span class="number">2</span>,<span class="number">3</span>,<span class="number">4</span>,<span class="number">5</span>,<span class="number">6</span>,<span class="number">7</span>,<span class="number">8</span>,<span class="number">9</span>,<span class="number">10</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>y=[<span class="number">1</span>,<span class="number">5</span>,<span class="number">2</span>,<span class="number">4</span>,<span class="number">3</span>,<span class="number">7</span>,<span class="number">6</span>,<span class="number">8</span>,<span class="number">9</span>,<span class="number">10</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>kendalltau(x,y)</span><br><span class="line">(<span class="number">0.7777777777777779</span>, <span class="number">0.0017451191944018172</span>)</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>x=[<span class="number">1</span>,<span class="number">1</span>,<span class="number">1</span>,<span class="number">2</span>,<span class="number">2</span>,<span class="number">2</span>,<span class="number">2</span>,<span class="number">3</span>,<span class="number">3</span>,<span class="number">4</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>y=[<span class="number">1</span>,<span class="number">1</span>,<span class="number">1</span>,<span class="number">1</span>,<span class="number">1</span>,<span class="number">1</span>,<span class="number">2</span>,<span class="number">2</span>,<span class="number">2</span>,<span class="number">2</span>]</span><br><span class="line"><span class="meta">&gt;&gt;&gt; </span>kendalltau(x,y)</span><br><span class="line">(<span class="number">0.72456883730947197</span>, <span class="number">0.0035417200011750309</span>)</span><br></pre></td></tr></table></figure></p>
<p>其中，<code>kendalltau</code> 返回的第二个结果是 p-value，其具体含义可参考<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html" target="_blank" rel="noopener">官方文档</a>。</p>
</blockquote>
<h3 id="Take-aways"><a href="#Take-aways" class="headerlink" title="Take-aways"></a>Take-aways</h3><ol>
<li><strong>Kendall</strong> 秩相关系数可以用于度量有序变量间相关性，只要求变量取值之间可比，对变量的分布和数据的距离不作假设；</li>
<li>能用 <strong>Pearson</strong> 相关系数和 <strong>Spearman</strong> 秩相关系数的地方都能用 <strong>Kendall</strong> 秩相关系数，但是 <strong>Spearman</strong> 和 <strong>Kendall</strong> 秩相关系数要对数据排序，复杂度远高于 <strong>Pearson</strong> 相关系数，因此能用 <strong>Pearson</strong> 相关系数的时候优先考虑 <strong>Pearson</strong> 相关系数；</li>
<li><strong>Kendall</strong> 秩相关系数依赖一致对和分歧对的计数，这里需要注意数据中是否有重复取值的情况，来选择使用 <strong>Tau-a</strong> 还是 <strong>Tau-b</strong> 进行计算。</li>
</ol>
<hr>
<blockquote>
<h4 id="这是特征选择系列文章的第三篇，其他文章可参考："><a href="#这是特征选择系列文章的第三篇，其他文章可参考：" class="headerlink" title="这是特征选择系列文章的第三篇，其他文章可参考："></a>这是特征选择系列文章的第三篇，其他文章可参考：</h4><ol>
<li><a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener">常用的特征选择方法之 Pearson 相关系数</a></li>
<li><a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-spearman/" target="_blank" rel="noopener">常用的特征选择方法之 Spearman 相关系数</a></li>
<li><a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-kendall/" target="_blank" rel="noopener">常用的特征选择方法之 Kendall 秩相关系数</a></li>
</ol>
</blockquote>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2019/08/10/feature-selection-kendall/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/hcigmoid/2019/08/10/feature-selection-kendall/" itemprop="url">常用的特征选择方法之 Kendall 秩相关系数</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-08-10T22:44:41+08:00">
                2019-08-10
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/特征工程/" itemprop="url" rel="index">
                    <span itemprop="name">特征工程</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2019/08/10/feature-selection-kendall/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2019/08/10/feature-selection-kendall/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  888
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  3
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          <p>前面我们已经讨论了 <a href="https://guyuecanhui.github.io/2019/07/20/feature-selection-pearson/" target="_blank" rel="noopener"><strong>Pearson</strong> 相关系数</a>和 <a href="https://guyuecanhui.github.io/2019/07/28/feature-selection-spearman/" target="_blank" rel="noopener"><strong>Spearman</strong> 秩相关系数</a>，它们可以检测连续变量间的相关性，并且 <strong>Spearman</strong> 秩相关系数还能够检测有序的离散变量间的相关系数。今天我们再讨论一个能够检测有序变量相关性的系数：<strong>Kendall</strong> 秩相关系数。这里有序变量既包括实数变量，也包括可以排序的类别变量，比如名次、年龄段等。</p>
          <!--noindex-->
          <div class="post-button text-center">
            <a class="btn" href="/hcigmoid/2019/08/10/feature-selection-kendall/#more" rel="contents">
              阅读全文 &raquo;
            </a>
          </div>
          <!--/noindex-->
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
  </section>

  
  <nav class="pagination">
    <a class="extend prev" rel="prev" href="/hcigmoid/page/2/"><i class="fa fa-angle-left"></i></a><a class="page-number" href="/hcigmoid/">1</a><a class="page-number" href="/hcigmoid/page/2/">2</a><span class="page-number current">3</span><a class="page-number" href="/hcigmoid/page/4/">4</a><span class="space">&hellip;</span><a class="page-number" href="/hcigmoid/page/7/">7</a><a class="extend next" rel="next" href="/hcigmoid/page/4/"><i class="fa fa-angle-right"></i></a>
  </nav>



          </div>
          


          

        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      

      <section class="site-overview-wrap sidebar-panel sidebar-panel-active">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <p class="site-author-name" itemprop="name">古月残辉</p>
              <p class="site-description motion-element" itemprop="description">总结心得</p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/hcigmoid/archives/">
              
                  <span class="site-state-item-count">62</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-categories">
                <a href="/hcigmoid/categories/index.html">
                  <span class="site-state-item-count">6</span>
                  <span class="site-state-item-name">分类</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/hcigmoid/tags/index.html">
                  <span class="site-state-item-count">70</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          
            <div class="feed-link motion-element">
              <a href="/hcigmoid/atom.xml" rel="alternate">
                <i class="fa fa-rss"></i>
                RSS
              </a>
            </div>
          

          
            <div class="links-of-author motion-element">
                
                  <span class="links-of-author-item">
                    <a href="mailto:guyuecanhui@icloud.com" target="_blank" title="E-Mail">
                      
                        <i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  </span>
                
            </div>
          

          
          

          
          

          

        </div>
      </section>

      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; 2018 &mdash; <span itemprop="copyrightYear">2020</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">古月残辉</span>

  
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item-icon">
      <i class="fa fa-area-chart"></i>
    </span>
    
      <span class="post-meta-item-text">Site words total count&#58;</span>
    
    <span title="Site words total count">105.3k</span>
  
</div>









<script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>

        
<div class="busuanzi-count">
  <script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>

  
    <span class="site-uv">
      <i class="fa fa-user"></i> 访问人数
      <span class="busuanzi-value" id="busuanzi_value_site_uv"></span>
      人
    </span>
  

  
    <span class="site-pv">
      <i class="fa fa-eye"></i> 总访问量
      <span class="busuanzi-value" id="busuanzi_value_site_pv"></span>
      次
    </span>
  
</div>








        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/hcigmoid/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/hcigmoid/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/hcigmoid/js/src/motion.js?v=5.1.4"></script>



  
  


  <script type="text/javascript" src="/hcigmoid/js/src/affix.js?v=5.1.4"></script>

  <script type="text/javascript" src="/hcigmoid/js/src/schemes/pisces.js?v=5.1.4"></script>



  

  


  <script type="text/javascript" src="/hcigmoid/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  










  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  <script src="//unpkg.com/valine/dist/Valine.min.js"></script>
  
  <script type="text/javascript">
    var GUEST = ['nick','mail','link'];
    var guest = 'nick,mail,link';
    guest = guest.split(',').filter(item=>{
      return GUEST.indexOf(item)>-1;
    });
    new Valine({
        el: '#comments' ,
        verify: false,
        notify: false,
        appId: '6du4Ppc2TvUuhcccRHSDNH2v-gzGzoHsz',
        appKey: 'zOKNml4W1Bq3OTzEuLt5hUjI',
        placeholder: '感谢阅读！欢迎评论！',
        avatar:'mm',
        guest_info:guest,
        pageSize:'10' || 10,
    });
  </script>



  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/hcigmoid/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'auto') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  

  
  
    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  


  

  

</body>
</html>
